## *********************************************
##    C1 Alternative Measures of GDP Pressures       
##            Hao Zhang & Ye Zhang
##                 2025/7/24
## *********************************************

# load packages
library(readstata13)
library(tidyverse)
library(doBy)
library(haven)
library(lfe)
library(stargazer)

# construct gdp pressure based on nearest city in the same province
setwd("../raw data")
distance <- read.dta13("city spatial distance.dta")
distance[distance == 0] <- NA
citycode <- distance %>% select(cityid, cityseries) %>% unique() %>% 
  filter(substr(cityid, 5, 6) == "00") %>% mutate(citycode = substr(cityid, 0, 4)) %>% 
  select(-cityid) %>% mutate(citynumber = tolower(cityseries)) %>% select(-cityseries)

# change from wide to long format
long <- distance %>% 
  pivot_longer(
    cols = `city1`:`city620`, 
    names_to = "citynumber",
    values_to = "distance"
  )

# sift out closest city in the same province pairs
long$cityseries <- tolower(long$cityseries)
long <- left_join(long, citycode, by = "citynumber") %>% drop_na() 
citycode <- citycode %>% rename(cityseries = citynumber)
long <- left_join(long, citycode, by = "cityseries") %>% drop_na()
long <- long %>% filter(substr(citycode.x, 0, 2) == substr(citycode.y, 0, 2))
long1 <- long %>% group_by(citycode.x) %>% slice(which.min(distance)) %>% select(citycode.x, citycode.y)

# try second nearest city
long$citycodexy <- paste0(long$citycode.x, long$citycode.y, sep = "")
long1$citycodexy <- paste0(long1$citycode.x, long1$citycode.y, sep = "")
'%!in%' <- function(x,y)!('%in%'(x,y))
long <- long %>% filter(citycodexy %!in% long1$citycodexy)
long2 <- long %>% group_by(citycode.x) %>% slice(which.min(distance)) %>% select(citycode.x, citycode.y)
long1 <- long1 %>% select(-citycodexy)

# extract gdp growth pressure
load("../analysis data/firm analysis.RData")
city <- firm %>% select(gdpgrowth, citycode, year) %>% unique() %>% drop_na()
long1 <- long1 %>% rename(citycode = citycode.x)
long1$citycode <- as.numeric(long1$citycode)
result <- left_join(city, long1, by = "citycode")
city <- city %>% rename(citycode.y.y = citycode)
city$citycode.y.y <- as.character(city$citycode.y.y)
result <- left_join(result, city, by = c("citycode.y.y", "year"))

long2 <- long2 %>% rename(citycode = citycode.x)
long2$citycode <- as.numeric(long2$citycode)
result <- left_join(result, long2, by = "citycode")
city <- city %>% rename(citycode.y.y.y = citycode.y.y)
city$citycode.y.y.y <- as.character(city$citycode.y.y.y)
result <- left_join(result, city, by = c("citycode.y.y.y", "year"))

result$gdp_pressure_nearest1 <- result$gdpgrowth.y - result$gdpgrowth.x
result$gdp_pressure_nearest2 <- (result$gdpgrowth.y + result$gdpgrowth - 2 * result$gdpgrowth.x)/2
result <- result %>% select(citycode, year, gdp_pressure_nearest1, gdp_pressure_nearest2)

# run regression again
firm <- left_join(firm, result, by = c("citycode", "year"))
cor(firm$gdp_pressure, firm$gdp_pressure_nearest1, use = "complete.obs")
cor(firm$gdp_pressure, firm$gdp_pressure_nearest2, use = "complete.obs")

# run inverse hyperbolic sine
ihs <- function(x) {
  y <- log(x + sqrt(x ^ 2 + 1))
  return(y)
}

# unemployment insurance
m1 <- felm(insurance_dummy ~ gdp_pressure_nearest1 + ihs(employment) + 
             ihs(wage) + ihs(output1) + ihs(profit) +  
             log(FDI + 1) + gdpgrowth + log(gdp/population) +
             ihs(deficit/gdp) +  ihs(capacity) + tax | 
             frdm + year + educ + ethnicity  + gender + 
             begin + connection + seq | 0 | citycode, data = firm)

m2 <- felm(insurance_dummy ~ gdp_pressure_nearest2 + ihs(employment) +
             ihs(wage) + ihs(output1) + ihs(profit) +  
             log(FDI + 1) + gdpgrowth + log(gdp/population) +
             ihs(deficit/gdp) +  ihs(capacity) + tax | 
             frdm + year + educ + ethnicity  + gender + 
             begin + connection + seq | 0 | citycode, data = firm)

# medical insurance
m3 <- felm(medical_dummy ~ gdp_pressure_nearest1 + ihs(employment) + 
             ihs(wage) + ihs(output1) + ihs(profit) +  
             log(FDI + 1) + gdpgrowth + log(gdp/population) +
             ihs(deficit/gdp) +  ihs(capacity) + tax | 
             frdm + year + educ + ethnicity  + gender + 
             begin + connection + seq | 0 | citycode, data = firm)

m4 <- felm(medical_dummy ~ gdp_pressure_nearest2 + ihs(employment) +
             ihs(wage) + ihs(output1) + ihs(profit) +  
             log(FDI + 1) + gdpgrowth + log(gdp/population) +
             ihs(deficit/gdp) +  ihs(capacity) + tax | 
             frdm + year + educ + ethnicity  + gender + 
             begin + connection + seq | 0 | citycode, data = firm)

# output regression table
stargazer(m1, m2, m3, m4, title = "Interjurisdictional Competition and Social Insurances (Alternative Measures)", 
          dep.var.labels = c("Unemployment", "Unemployment", "Health and Pension", "Health and Pension"),
          omit.stat = c("rsq", "adj.rsq", "ser"), 
          font.size = "small", 
          add.lines = list(c("Mayor Characteristics", "Y", "Y", "Y", "Y"),
                           c("Firm Characteristics", "Y", "Y", "Y", "Y"), 
                           c("Firm Fixed Effects", "Y", "Y", "Y", "Y"), 
                           c("Year Fixed Effects", "Y", "Y", "Y", "Y")))
